#read all csv file in diffrent dataframes
import pandas as pd
import os
p23 = pd.read_csv('precipitation_2023.csv')
p22 = pd.read_csv('precipitation_2022.csv')
p21 = pd.read_csv('precipitation_2021.csv')
p20 = pd.read_csv('precipitation_2020.csv')
p19 = pd.read_csv('precipitation_2019.csv')
p18 = pd.read_csv('precipitation_2018.csv')
p17 = pd.read_csv('precipitation_2017.csv')
p16 = pd.read_csv('precipitation_2016.csv')
p15 = pd.read_csv('precipitation_2015.csv')
p14 = pd.read_csv('precipitation_2014.csv')
p13 = pd.read_csv('precipitation_2013.csv')
p12 = pd.read_csv('precipitation_2012.csv')
p11 = pd.read_csv('precipitation_2011.csv')
p10 = pd.read_csv('precipitation_2010.csv')
p9 = pd.read_csv('precipitation_2009.csv')
p8 = pd.read_csv('precipitation_2008.csv')
p7 = pd.read_csv('precipitation_2007.csv')
p6 = pd.read_csv('precipitation_2006.csv')
p5 = pd.read_csv('precipitation_2005.csv')
p4 = pd.read_csv('precipitation_2004.csv')
p3 = pd.read_csv('precipitation_2003.csv')
mon = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec','lat','lon']
p3.columns = mon
p4.columns = mon
p5.columns = mon
p6.columns = mon
p7.columns = mon
p8.columns = mon
p9.columns = mon
p10.columns = mon
p11.columns = mon
p12.columns = mon
p13.columns = mon
p14.columns = mon
p15.columns = mon
p16.columns = mon
p17.columns = mon
p18.columns = mon
p19.columns = mon
p20.columns = mon
p21.columns = mon
p22.columns = mon
p23.columns = mon
import matplotlib.pyplot as plt
monthly_averages2023 = p23.mean()
monthly_averages2023 = monthly_averages2023.drop(['lat','lon'])
monthly_averages2022 = p22.mean()
monthly_averages2022 = monthly_averages2022.drop(['lat','lon'])
monthly_averages2021 = p21.mean()
monthly_averages2021 = monthly_averages2021.drop(['lat','lon'])
monthly_averages2020 = p20.mean()
monthly_averages2020 = monthly_averages2020.drop(['lat','lon'])
monthly_averages2019 = p19.mean()
monthly_averages2019 = monthly_averages2019.drop(['lat','lon'])
monthly_averages2018 = p18.mean()
monthly_averages2018 = monthly_averages2018.drop(['lat','lon'])
monthly_averages2017 = p17.mean()
monthly_averages2017 = monthly_averages2017.drop(['lat','lon'])
monthly_averages2016 = p16.mean()
monthly_averages2016 = monthly_averages2016.drop(['lat','lon'])
monthly_averages2015 = p15.mean()
monthly_averages2015 = monthly_averages2015.drop(['lat','lon'])
monthly_averages2014 = p14.mean()
monthly_averages2014 = monthly_averages2014.drop(['lat','lon'])
monthly_averages2013 = p13.mean()
monthly_averages2013 = monthly_averages2013.drop(['lat','lon'])
monthly_averages2012 = p12.mean()
monthly_averages2012 = monthly_averages2012.drop(['lat','lon'])
monthly_averages2011 = p11.mean()
monthly_averages2011 = monthly_averages2011.drop(['lat','lon'])
monthly_averages2010 = p10.mean()
monthly_averages2010 = monthly_averages2010.drop(['lat','lon'])
monthly_averages2009 = p9.mean()
monthly_averages2009 = monthly_averages2009.drop(['lat','lon'])
monthly_averages2008 = p8.mean()
monthly_averages2008 = monthly_averages2008.drop(['lat','lon'])
monthly_averages2007 = p7.mean()
monthly_averages2007 = monthly_averages2007.drop(['lat','lon'])
monthly_averages2006 = p6.mean()
monthly_averages2006 = monthly_averages2006.drop(['lat','lon'])
monthly_averages2005 = p5.mean()
monthly_averages2005 = monthly_averages2005.drop(['lat','lon'])
monthly_averages2004 = p4.mean()
monthly_averages2004 = monthly_averages2004.drop(['lat','lon'])
monthly_averages2003 = p3.mean()
monthly_averages2003 = monthly_averages2003.drop(['lat','lon'])
#plot the data of each month value in graph point
monthly_averages2023.plot(kind='line')
plt.show()
monthly_averages2022.plot(kind='line')
plt.show()
monthly_averages2021.plot(kind='line')
plt.show()
maindf = pd.concat([monthly_averages2003, monthly_averages2004, monthly_averages2005, monthly_averages2006, monthly_averages2007, monthly_averages2008, monthly_averages2009, monthly_averages2010, monthly_averages2011, monthly_averages2012, monthly_averages2013, monthly_averages2014, monthly_averages2015, monthly_averages2016, monthly_averages2017, monthly_averages2018, monthly_averages2019, monthly_averages2020, monthly_averages2021, monthly_averages2022, monthly_averages2023], axis=1)
maindf.columns = range(2003,2024)
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
ls = [maindf.loc[month].mean() for month in months]
maindf['avg_of_all'] = ls
#plot the avg of all month in each year
lsa= maindf['avg_of_all']
lsa.plot(kind='line')
plt.show()
sam = maindf['avg_of_all']
amamoly = maindf.sub(sam, axis=0)
# i need the analmoly in time series
amamoly.plot()
plt.show()
data ={
'index':['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
'2003':amamoly[2003],
'2004':amamoly[2004],
'2005':amamoly[2005],
'2006':amamoly[2006],
'2007':amamoly[2007],
'2008':amamoly[2008],
'2009':amamoly[2009],
'2010':amamoly[2010],
'2011':amamoly[2011],
'2012':amamoly[2012],
'2013':amamoly[2013],
'2014':amamoly[2014],
'2015':amamoly[2015],
'2016':amamoly[2016],
'2017':amamoly[2017],
'2018':amamoly[2018],
'2019':amamoly[2019],
'2020':amamoly[2020],
'2021':amamoly[2021],
'2022':amamoly[2022],
'2023':amamoly[2023]
}
df = pd.DataFrame(data)
# Melt the DataFrame to long format
df_long = df.melt(id_vars=['index'], var_name='Year', value_name='Value')
# Convert 'index' to datetime
df_long['Date'] = pd.to_datetime(df_long['Year'] + '-' + df_long['index'], format='%Y-%b')
# Plotting the time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], df_long['Value'], marker='', linestyle='-', color='black')
plt.fill_between(df_long['Date'], df_long['Value'],where= df_long['Value']>0, color='blue', alpha=0.6)
plt.fill_between(df_long['Date'], df_long['Value'],where= df_long['Value']<0, color='red', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Year')
plt.ylabel(' Precipitation mm/hr') # Rotate x-axis labels for better readability
# Show the plot
plt.grid(True)
plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def moving_average(data, window_size):
return data.rolling(window=window_size, center=True).mean()
def kz_filter(data, window_size, iterations):
result = data.copy()
for _ in range(iterations):
result = moving_average(result, window_size)
return result# Apply KZ filter
window_size = 5
iterations = 3
df_longss = df_long.copy()
df_longss['Filtered_Value5'] = kz_filter(df_long['Value'], window_size, iterations)
# Plotting the time series data
plt.figure(figsize=(18, 5))
plt.plot(df_longss['Date'], df_longss['Filtered_Value5'], marker='', linestyle='-', color='black', linewidth=2, label='KZ Filtered Value')
plt.fill_between(df_longss['Date'], df_longss['Filtered_Value5'],where=df_longss['Filtered_Value5']>0, color='red', alpha=0.6)
plt.fill_between(df_longss['Date'], df_longss['Filtered_Value5'],where=df_longss['Filtered_Value5']<0, color='blue', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with KZ Filter with window size 5 ')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr')
plt.legend()
# Show the plot
plt.grid(True)
plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def moving_average(data, window_size):
return data.rolling(window=window_size, center=True).mean()
def kz_filter(data, window_size, iterations):
result = data.copy()
for _ in range(iterations):
result = moving_average(result, window_size)
return result# Apply KZ filter
window_size = 3
iterations = 3
df_longss['Filtered_Value3'] = kz_filter(df_long['Value'], window_size, iterations)
# Plotting the time series data
plt.figure(figsize=(18, 5))
plt.plot(df_longss['Date'], df_longss['Filtered_Value3'], marker='', linestyle='-', color='black', linewidth=2, label='KZ Filtered Value')
plt.fill_between(df_longss['Date'], df_longss['Filtered_Value3'],where=df_longss['Filtered_Value3']>0, color='red', alpha=0.6)
plt.fill_between(df_longss['Date'], df_longss['Filtered_Value3'],where=df_longss['Filtered_Value3']<0, color='blue', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with KZ Filter with window size 3')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr')
plt.legend()
# Show the plot
plt.grid(True)
plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def moving_average(data, window_size):
return data.rolling(window=window_size, center=True).mean()
def kz_filter(data, window_size, iterations):
result = data.copy()
for _ in range(iterations):
result = moving_average(result, window_size)
return result# Apply KZ filter
window_size = 7
iterations = 3
df_longss['Filtered_Value7'] = kz_filter(df_long['Value'], window_size, iterations)
# Plotting the time series data
plt.figure(figsize=(18, 5))
plt.plot(df_longss['Date'], df_longss['Filtered_Value7'], marker='', linestyle='-', color='black', linewidth=2, label='KZ Filtered Value')
plt.fill_between(df_longss['Date'], df_longss['Filtered_Value7'],where=df_longss['Filtered_Value7']>0, color='red', alpha=0.6)
plt.fill_between(df_longss['Date'], df_longss['Filtered_Value7'],where=df_longss['Filtered_Value7']<0, color='blue', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2021 with KZ Filter with window size 7')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr')
plt.legend()
# Show the plot
plt.grid(True)
plt.show()
from scipy.ndimage import gaussian_filter1d
df_long = df_long.sort_values('Date')
# Apply Gaussian smoothing
sigma = 3 # Standard deviation for Gaussian kernel
smoothed_values1 = gaussian_filter1d(df_long['Value'], sigma=sigma)
# Plotting the original and smoothed time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], smoothed_values1, color='black', label='Smoothed Value')
plt.fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing with sigma 3')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr') # Rotate x-axis labels for better readability
plt.legend()
# Show the plot
plt.grid(True)
plt.show()
from scipy.ndimage import gaussian_filter1d
df_long = df_long.sort_values('Date')
# Apply Gaussian smoothing
sigma = 5 # Standard deviation for Gaussian kernel
smoothed_values2 = gaussian_filter1d(df_long['Value'], sigma=sigma)
# Plotting the original and smoothed time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], smoothed_values2, color='black', label='Smoothed Value')
plt.fill_between(df_long['Date'], smoothed_values2,where=smoothed_values2>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], smoothed_values2,where=smoothed_values2<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing with sigma 5')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr')
# Rotate x-axis labels for better readability
plt.legend()
# Show the plot
plt.grid(True)
plt.show()
from scipy.ndimage import gaussian_filter1d
df_long = df_long.sort_values('Date')
# Apply Gaussian smoothing
sigma = 7 # Standard deviation for Gaussian kernel
smoothed_values3 = gaussian_filter1d(df_long['Value'], sigma=sigma)
# Plotting the original and smoothed time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], smoothed_values3, color='black', label='Smoothed Value')
plt.fill_between(df_long['Date'], smoothed_values3,where=smoothed_values3>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], smoothed_values3,where=smoothed_values3<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing with sigma 7')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr') # Rotate x-axis labels for better readability
plt.legend()
# Show the plot
plt.grid(True)
plt.show()
from scipy.signal import convolve
def triangle_kernel(size):
kernel = np.zeros(size)
half_size = size // 2
for i in range(half_size + 1):
kernel[i] = i + 1
kernel[-(i + 1)] = i + 1
return kernel / kernel.sum()
kernel_size = 3 # Adjust size as needed
kernel = triangle_kernel(kernel_size)
# Apply triangle smoothing
Tsmoothed_values = convolve(df_long['Value'], kernel, mode='same')
# Plotting the original and smoothed time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], Tsmoothed_values, color='black', label='Smoothed Value')
plt.fill_between(df_long['Date'], Tsmoothed_values, where=Tsmoothed_values>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], Tsmoothed_values, where=Tsmoothed_values<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with Triangle Smoothing with kernel size 3')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr') # Rotate x-axis labels for better readability
plt.legend()
# Show the plot
plt.grid(True)
plt.show()
from scipy.signal import convolve
def triangle_kernel(size):
kernel = np.zeros(size)
half_size = size // 2
for i in range(half_size + 1):
kernel[i] = i + 1
kernel[-(i + 1)] = i + 1
return kernel / kernel.sum()
kernel_size = 5 # Adjust size as needed
kernel = triangle_kernel(kernel_size)
# Apply triangle smoothing
Tsmoothed_values5 = convolve(df_long['Value'], kernel, mode='same')
# Plotting the original and smoothed time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], Tsmoothed_values5, color='black', label='Smoothed Value')
plt.fill_between(df_long['Date'], Tsmoothed_values5, where=Tsmoothed_values5>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], Tsmoothed_values5, where=Tsmoothed_values5<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with Triangle Smoothing with kernel size 5')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr') # Rotate x-axis labels for better readability
plt.legend()
# Show the plot
plt.grid(True)
plt.show()
from scipy.signal import convolve
def triangle_kernel(size):
kernel = np.zeros(size)
half_size = size // 2
for i in range(half_size + 1):
kernel[i] = i + 1
kernel[-(i + 1)] = i + 1
return kernel / kernel.sum()
kernel_size = 7 # Adjust size as needed
kernel = triangle_kernel(kernel_size)
# Apply triangle smoothing
Tsmoothed_values7 = convolve(df_long['Value'], kernel, mode='same')
# Plotting the original and smoothed time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], Tsmoothed_values7, color='black')
plt.fill_between(df_long['Date'], Tsmoothed_values7, where=Tsmoothed_values7>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], Tsmoothed_values7, where=Tsmoothed_values7<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with Triangle Smoothing with kernel size 7')
plt.xlabel('Year')
plt.ylabel('precepitation mm/hr')
# Show the plot
plt.grid(True)
plt.show()
import pandas as pd
# Assuming amamoly is already defined
pf = amamoly.drop('avg_of_all', axis=1)
# Calculate 3-month moving averages
m1 = (pf.loc['Jan'] + pf.loc['Feb'] + pf.loc['Mar']) / 3
m2 = (pf.loc['Feb'] + pf.loc['Mar'] + pf.loc['Apr']) / 3
m3 = (pf.loc['Mar'] + pf.loc['Apr'] + pf.loc['May']) / 3
m4 = (pf.loc['Apr'] + pf.loc['May'] + pf.loc['Jun']) / 3
m5 = (pf.loc['May'] + pf.loc['Jun'] + pf.loc['Jul']) / 3
m6 = (pf.loc['Jun'] + pf.loc['Jul'] + pf.loc['Aug']) / 3
m7 = (pf.loc['Jul'] + pf.loc['Aug'] + pf.loc['Sep']) / 3
m8 = (pf.loc['Aug'] + pf.loc['Sep'] + pf.loc['Oct']) / 3
m9 = (pf.loc['Sep'] + pf.loc['Oct'] + pf.loc['Nov']) / 3
m10 = (pf.loc['Oct'] + pf.loc['Nov'] + pf.loc['Dec']) / 3
# Concatenate the results into a single DataFrame
df = pd.concat([m1, m2, m3, m4, m5, m6, m7, m8, m9, m10], axis=1)
# Set appropriate column names
df.columns = range(1, 11)
# Add a 'Year' column and set it as the index
df['Year'] = range(2003, 2024)
# Transform the DataFrame from wide to long format
df_melted = df.melt(id_vars=["Year"], var_name="Month", value_name="Value")
# Print the first few rows of the melted DataFrame for verification
# store the data in a variable by taking the values of df_melted by sorting the year and month
a = df_melted.sort_values(by=['Year', 'Month'])
a["Date"] = pd.to_datetime(a['Year'].astype(str) + '-' + a['Month'].astype(str), format='%Y-%m')
plt.figure(figsize=(18, 5))
# Plot the continuous line
plt.plot(a['Date'], a['Value'], linestyle='-', color='black')
plt.fill_between(a['Date'], a['Value'],where=a['Value']>0, color='r', alpha=0.6)
plt.fill_between(a['Date'], a['Value'],where=a['Value']<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('3-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Date')
plt.ylabel('precepitation mm/hr')
# Rotate x-axis labels for better readability
# Show the plot
plt.grid(True)
plt.show()
import pandas as pd
# Assuming amamoly is a DataFrame with appropriate data
pf = amamoly.drop('avg_of_all', axis=1)
# Calculate 5-month moving averages
m1 = (pf.loc['Jan'] + pf.loc['Feb'] + pf.loc['Mar'] + pf.loc['Apr'] + pf.loc['May']) / 5
m2 = (pf.loc['Feb'] + pf.loc['Mar'] + pf.loc['Apr'] + pf.loc['May'] + pf.loc['Jun']) / 5
m3 = (pf.loc['Mar'] + pf.loc['Apr'] + pf.loc['May'] + pf.loc['Jun'] + pf.loc['Jul']) / 5
m4 = (pf.loc['Apr'] + pf.loc['May'] + pf.loc['Jun'] + pf.loc['Jul'] + pf.loc['Aug']) / 5
m5 = (pf.loc['May'] + pf.loc['Jun'] + pf.loc['Jul'] + pf.loc['Aug'] + pf.loc['Sep']) / 5
m6 = (pf.loc['Jun'] + pf.loc['Jul'] + pf.loc['Aug'] + pf.loc['Sep'] + pf.loc['Oct']) / 5
m7 = (pf.loc['Jul'] + pf.loc['Aug'] + pf.loc['Sep'] + pf.loc['Oct'] + pf.loc['Nov']) / 5
m8 = (pf.loc['Aug'] + pf.loc['Sep'] + pf.loc['Oct'] + pf.loc['Nov'] + pf.loc['Dec']) / 5
# Concatenate the results into a single DataFrame
df = pd.concat([m1, m2, m3, m4, m5, m6, m7, m8], axis=1)
# Set appropriate column names
df.columns = range(1, 9)
# Add a 'Year' column and set it as the index
df['Year'] = range(2003, 2024)
# Transform the DataFrame from wide to long format
df_melted = df.melt(id_vars=["Year"], var_name="Month", value_name="Value")
# Sort the DataFrame by 'Year' and 'Month'
b = df_melted.sort_values(by=['Year', 'Month'])
# Convert 'Year' and 'Month' to datetime format
b["Date"] = pd.to_datetime(b['Year'].astype(str) + '-' + b['Month'].astype(str), format='%Y-%m')
# Print the first few rows of the melted DataFrame for verification
print(b.head())
Year Month Value Date 0 2003 1 -0.006881 2003-01-01 21 2003 2 -0.002396 2003-02-01 42 2003 3 0.004581 2003-03-01 63 2003 4 0.000430 2003-04-01 84 2003 5 -0.007025 2003-05-01
plt.figure(figsize=(18, 5))
# Plot the continuous line
plt.plot(b['Date'], b['Value'], linestyle='-', color='black')
plt.fill_between(b['Date'], b['Value'],where=b['Value']>0, color='r', alpha=0.6)
plt.fill_between(b['Date'], b['Value'],where=b['Value']<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('5-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Date')
plt.ylabel('precepitation mm/hr')
# Rotate x-axis labels for better readability
# Show the plot
plt.grid(True)
plt.show()
#create a data frame and read the DMI data
dmi = pd.read_csv('DMI.csv')
dmi['Date'] = pd.to_datetime(dmi['Date'], format='%Y-%m-%d')
plt.figure(figsize=(18, 5))
plt.plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
plt.fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
plt.fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
plt.title('Dipole Mode Index ')
plt.xlabel('Year')
plt.ylabel('DMI Value')
# Add gridlines
plt.grid(True)
import pandas as pd
import matplotlib.pyplot as plt
# Load the data
onia = pd.read_csv('oni_data.csv')
onia = onia[onia['Year'] > 2002]
# Create the plot
onia["Month"] = [i%12+1 for i in range(len(onia))]
onia.info()
plt.figure(figsize=(18, 5))
# Convert 'Year' and 'Month' to datetime format
onia["Date"] = pd.to_datetime(onia['Year'].astype(str) + '-' + onia['Month'].astype(str), format='%Y-%m')
# Plot the ONI data
plt.plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
plt.fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
plt.fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
plt.title('Oceanic Niño Index')
plt.xlabel('Year')
plt.ylabel('ONI Value')
# Add gridlines
plt.grid(True)
# Show the plot
plt.legend()
plt.show()
<class 'pandas.core.frame.DataFrame'> Index: 257 entries, 636 to 892 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 SEAS 257 non-null object 1 Year 257 non-null int64 2 Total 257 non-null float64 3 ANOM 257 non-null float64 4 Month 257 non-null int64 dtypes: float64(2), int64(2), object(1) memory usage: 12.0+ KB
# Read the CSV file
PDO = pd.read_csv('PDO.csv')
print(PDO.head())
# Convert 'Date' to datetime format
PDO['Date'] = pd.to_datetime(PDO['Date'], format='%Y-%m-%d')
# Filter the DataFrame for the years 2002 to 2023
PDO = PDO[(PDO['Year'] > 2002) & (PDO['Year'] < 2024)]
# Plot the data of PDO
plt.figure(figsize=(18, 5))
plt.plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
plt.fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
plt.fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
plt.title('Pacific Decadal Oscillation')
plt.xlabel('Year')
plt.ylabel('PDO Value')
# Add gridlines
plt.grid(True)
# Show the plot
plt.legend()
plt.show()
Year Month PDO Date 0 1854 Jan 0.11 1854-01-01 1 1854 Feb -0.24 1854-02-01 2 1854 Mar -0.40 1854-03-01 3 1854 Apr -0.44 1854-04-01 4 1854 May -0.54 1854-05-01
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
# Assuming amamoly is already defined
data = {
'index': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
'2003': amamoly[2003],
'2004': amamoly[2004],
'2005': amamoly[2005],
'2006': amamoly[2006],
'2007': amamoly[2007],
'2008': amamoly[2008],
'2009': amamoly[2009],
'2010': amamoly[2010],
'2011': amamoly[2011],
'2012': amamoly[2012],
'2013': amamoly[2013],
'2014': amamoly[2014],
'2015': amamoly[2015],
'2016': amamoly[2016],
'2017': amamoly[2017],
'2018': amamoly[2018],
'2019': amamoly[2019],
'2020': amamoly[2020],
'2021': amamoly[2021],
'2022': amamoly[2022],
'2023': amamoly[2023]
}
# Create DataFrame
df = pd.DataFrame(data)
# Melt the DataFrame to long format
df_longs = df.melt(id_vars=['index'], var_name='Year', value_name='Value')
# Convert 'index' to datetime
df_longs['Date'] = pd.to_datetime(df_longs['Year'] + '-' + df_longs['index'], format='%Y-%b')
# Sort by date to ensure continuous line plot
df_longs = df_longs.sort_values(by='Date')
# Calculate 3-month moving average
df_longs['3_month_MA'] = df_longs['Value'].rolling(window=3, min_periods=1).mean()
# Initialize the matplotlib figure
plt.figure(figsize=(18, 5))
# Plot the continuous line
plt.plot(df_longs['Date'], df_longs['3_month_MA'], marker='', linestyle='-', color='black')
plt.fill_between(df_longs['Date'], df_longs['3_month_MA'],where=df_longs['3_month_MA']>0, color='red', alpha=0.6,interpolate=True)
plt.fill_between(df_longs['Date'], df_longs['3_month_MA'],where=df_longs['3_month_MA']<0, color='blue', alpha=0.6,interpolate=True)
# Adding title and labels
plt.title('3-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Year')
plt.ylabel('precepitation mm/hr')
# Rotate x-axis labels for better readability
# Show the plot
plt.grid(True)
plt.show()
df = pd.DataFrame(data)
# Melt the DataFrame to long format
df_long = df.melt(id_vars=['index'], var_name='Year', value_name='Value')
# Convert 'index' to datetime
df_long['Date'] = pd.to_datetime(df_long['Year'] + '-' + df_long['index'], format='%Y-%b')
# Sort by date to ensure continuous line plot
df_long = df_long.sort_values(by='Date')
# Calculate 3-month moving average
df_long['5_month_MA'] = df_long['Value'].rolling(window=5, min_periods=1).mean()
# Initialize the matplotlib figure
plt.figure(figsize=(18, 5))
# Plot the continuous line
plt.plot(df_long['Date'], df_long['5_month_MA'], marker='', linestyle='-', color='black')
plt.fill_between(df_long['Date'], df_long['5_month_MA'],where=df_long['5_month_MA']>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], df_long['5_month_MA'],where=df_long['5_month_MA']<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('5-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Year')
plt.ylabel('precepitation mm/hr')
# Show the plot
plt.grid(True)
plt.show()
# Calculate 3-month moving average
df_long['7_month_MA'] = df_long['Value'].rolling(window=7, min_periods=1).mean()
# Initialize the matplotlib figure
plt.figure(figsize=(18, 5))
# Plot the continuous line
plt.plot(df_long['Date'], df_long['7_month_MA'], marker='', linestyle='-', color='black')
plt.fill_between(df_long['Date'], df_long['7_month_MA'],where=df_long['7_month_MA']>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], df_long['7_month_MA'],where=df_long['7_month_MA']<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('7-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Year')
plt.ylabel('precepitation mm/hr')
# Show the plot
plt.grid(True)
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_longss['Date'], df_longss['Filtered_Value3'], marker='', linestyle='-', color='black', linewidth=2, label='KZ Filtered Value')
axs[1].fill_between(df_longss['Date'], df_longss['Filtered_Value3'], where=df_longss['Filtered_Value3'] > 0, color='red', alpha=0.6)
axs[1].fill_between(df_longss['Date'],df_longss['Filtered_Value3'], where=df_longss['Filtered_Value3'] < 0, color='blue', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2021 with KZ Filter 3')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[1].legend()
# Rotate x-axis labels for better readability
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_longss['Date'], df_longss['Filtered_Value5'], marker='', linestyle='-', color='black', linewidth=2, label='KZ Filtered Value')
axs[1].fill_between(df_longss['Date'], df_longss['Filtered_Value5'], where=df_longss['Filtered_Value5'] > 0, color='red', alpha=0.6)
axs[1].fill_between(df_longss['Date'],df_longss['Filtered_Value5'], where=df_longss['Filtered_Value5'] < 0, color='blue', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2021 with KZ Filter 5')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[1].legend()
# Rotate x-axis labels for better readability
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_longss['Date'], df_longss['Filtered_Value7'], marker='', linestyle='-', color='black', linewidth=2, label='KZ Filtered Value')
axs[1].fill_between(df_longss['Date'], df_longss['Filtered_Value7'], where=df_longss['Filtered_Value7'] > 0, color='red', alpha=0.6)
axs[1].fill_between(df_longss['Date'],df_longss['Filtered_Value7'], where=df_longss['Filtered_Value7'] < 0, color='blue', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2021 with KZ Filter 7')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[1].legend()
# Rotate x-axis labels for better readability
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'], smoothed_values1, color='black', label='Smoothed Value')
axs[1].fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1>0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1<0, color='b', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing 3')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'], smoothed_values2, color='black', label='Smoothed Value')
axs[1].fill_between(df_long['Date'], smoothed_values2,where=smoothed_values2>0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], smoothed_values2,where=smoothed_values2<0, color='b', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing 5')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'], smoothed_values3, color='black', label='Smoothed Value')
axs[1].fill_between(df_long['Date'], smoothed_values3,where=smoothed_values3>0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], smoothed_values3,where=smoothed_values3<0, color='b', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing 7')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'], Tsmoothed_values, color='black')
axs[1].fill_between(df_long['Date'], Tsmoothed_values,where=Tsmoothed_values>0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], Tsmoothed_values,where=Tsmoothed_values<0, color='b', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2023 with Triangle Smoothing 3')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'], Tsmoothed_values5, color='black')
axs[1].fill_between(df_long['Date'], Tsmoothed_values5,where=Tsmoothed_values5>0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], Tsmoothed_values5,where=Tsmoothed_values5<0, color='b', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2023 with Triangle Smoothing 5')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'], Tsmoothed_values7, color='black')
axs[1].fill_between(df_long['Date'], Tsmoothed_values7,where=Tsmoothed_values7>0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], Tsmoothed_values7,where=Tsmoothed_values7<0, color='b', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2023 with Triangle Smoothing 7')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(b['Date'], b['Value'], linestyle='-', color='black')
axs[1].fill_between(b['Date'], b['Value'], where=b['Value'] > 0, color='r', alpha=0.6)
axs[1].fill_between(b['Date'], b['Value'], where=b['Value'] < 0, color='b', alpha=0.6)
axs[1].set_title('5-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
# Plot the third graph in the third subplot
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'],df_long['5_month_MA'], linestyle='-', color='black')
axs[1].fill_between(df_long['Date'], df_long['5_month_MA'], where=df_long['5_month_MA'] > 0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], df_long['5_month_MA'], where=df_long['5_month_MA'] < 0, color='b', alpha=0.6)
axs[1].set_title('5-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('Value')
axs[1].grid(True)
# Plot the third graph in the third subplot
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'],df_long['7_month_MA'], linestyle='-', color='black')
axs[1].fill_between(df_long['Date'], df_long['7_month_MA'], where=df_long['7_month_MA'] > 0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], df_long['7_month_MA'], where=df_long['7_month_MA'] < 0, color='b', alpha=0.6)
axs[1].set_title('7-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('Value')
axs[1].grid(True)
# Plot the third graph in the third subplot
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(2, 1, figsize=(18, 5))
# Plot the second graph in the second subplot
axs[0].plot(df_long['Date'],df_long['7_month_MA'], linestyle='-', color='black')
axs[0].fill_between(df_long['Date'], df_long['7_month_MA'], where=df_long['7_month_MA'] > 0, color='r', alpha=0.6)
axs[0].fill_between(df_long['Date'], df_long['7_month_MA'], where=df_long['7_month_MA'] < 0, color='b', alpha=0.6)
axs[0].set_title('7-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
axs[0].set_xlabel('Year')
axs[0].set_ylabel('Value')
axs[0].grid(True)
# Plot the third graph in the third subplot
axs[1].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[1].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[1].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[1].set_title('Oceanic Niño Index')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('ONI Value')
# Add gridlines
axs[1].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the third graph in the third subplot
axs[0].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[0].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[0].set_title('Oceanic Niño Index')
axs[0].set_xlabel('Year')
axs[0].set_ylabel('ONI Value')
# Add gridlines
axs[0].grid(True)
axs[1].plot(df_long['Date'],df_long['7_month_MA'], linestyle='-', color='black')
axs[1].fill_between(df_long['Date'], df_long['7_month_MA'], where=df_long['7_month_MA'] > 0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], df_long['7_month_MA'], where=df_long['7_month_MA'] < 0, color='b', alpha=0.6)
axs[1].set_title('7-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('Presispitation in mm/hr')
axs[1].grid(True)
# Plot the second graph in the second subplot
axs[2].plot(df_long['Date'],df_long['5_month_MA'], linestyle='-', color='black')
axs[2].fill_between(df_long['Date'], df_long['5_month_MA'], where=df_long['5_month_MA'] > 0, color='r', alpha=0.6)
axs[2].fill_between(df_long['Date'], df_long['5_month_MA'], where=df_long['5_month_MA'] < 0, color='b', alpha=0.6)
axs[2].set_title('5-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('Presispitation in mm/hr')
axs[2].grid(True)
axs[3].plot(df_longss['Date'],df_longss['Filtered_Value5'], linestyle='-', color='black')
axs[3].fill_between(df_longss['Date'], df_longss['Filtered_Value5'], where=df_longss['Filtered_Value5'] > 0, color='r', alpha=0.6)
axs[3].fill_between(df_longss['Date'], df_longss['Filtered_Value5'], where=df_longss['Filtered_Value5'] < 0, color='b', alpha=0.6)
axs[3].set_title('Monthly Time Series Data from 2003 to 2021 with KZ Filter 5')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('Presispitation in mm/hr')
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(2, 1, figsize=(18, 5))
# Plot the second graph in the second subplot
axs[0].plot(df_longss['Date'],df_longss['Filtered_Value5'], linestyle='-', color='black')
axs[0].fill_between(df_longss['Date'], df_longss['Filtered_Value5'], where=df_longss['Filtered_Value5'] > 0, color='r', alpha=0.6)
axs[0].fill_between(df_longss['Date'], df_longss['Filtered_Value5'], where=df_longss['Filtered_Value5'] < 0, color='b', alpha=0.6)
axs[0].set_title('Monthly Time Series Data from 2003 to 2021 with KZ Filter 5')
axs[0].set_xlabel('Year')
axs[0].set_ylabel('Value')
axs[0].grid(True)
# Plot the third graph in the third subplot
axs[1].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[1].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[1].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[1].set_title('Oceanic Niño Index')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('ONI Value')
# Add gridlines
axs[1].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
p = PDO['PDO'].values
dfss = df_long
dfss['PDO'] = p
dfss.head()
onia = onia[onia['Year'] < 2024]
k = onia['ANOM'].values
dfss['ONI'] = k
r = dmi['PDO'].values
dfss['DMI'] = r
s= df_longs['3_month_MA'].values
dfss['3_month_MA'] = s
dfss["triangular_smoothing3"] = Tsmoothed_values
dfss["triangular_smoothing5"] = Tsmoothed_values5
dfss["triangular_smoothing7"] = Tsmoothed_values7
dfss["gaussian_smoothing3"] = smoothed_values1
dfss["gaussian_smoothing5"] = smoothed_values2
dfss["gaussian_smoothing7"] = smoothed_values3
q = df_longss['Filtered_Value3'].values
dfss['KZ_Filter3'] = q
z =df_longss['Filtered_Value5'].values
dfss['KZ_Filter5'] = z
x = df_longss['Filtered_Value7'].values
dfss['KZ_Filter7'] = x
sz = df_long['7_month_MA'].values
dfss['7_month_MA'] = sz
l= df_long['5_month_MA'].values
dfss['5_month_MA'] = l
#corelation between PDO and 5 month moving average
dfss['5_month_MA'].corr(dfss['PDO'])
-0.2808645089482622
dfss['5_month_MA'].corr(dfss['ONI'])
-0.5013071599053801
dfss['5_month_MA'].corr(dfss['DMI'])
-0.12713524164925252
dfss['5_month_MA'].corr(dfss['PDO'])
-0.2808645089482622
dfss['triangular_smoothing3'].corr(dfss['PDO'])
-0.23292069190042325
dfss['triangular_smoothing3'].corr(dfss['ONI'])
-0.3158792688808798
dfss['triangular_smoothing3'].corr(dfss['DMI'])
-0.019553277495745515
print(dfss['triangular_smoothing5'].corr(dfss['PDO']))
print(dfss['triangular_smoothing5'].corr(dfss['ONI']))
print(dfss['triangular_smoothing5'].corr(dfss['DMI']))
-0.25420245535814556 -0.3916778670235601 -0.01927696298063789
print(dfss['triangular_smoothing7'].corr(dfss['PDO']))
print(dfss['triangular_smoothing7'].corr(dfss['ONI']))
print(dfss['triangular_smoothing7'].corr(dfss['DMI']))
-0.2672400738747559 -0.4405282164520356 -0.028733438851717998
print(dfss['gaussian_smoothing3'].corr(dfss['PDO']))
print(dfss['gaussian_smoothing3'].corr(dfss['ONI']))
print(dfss['gaussian_smoothing3'].corr(dfss['DMI']))
-0.32840967667516635 -0.4782639689582464 -0.021099686013068688
print(dfss['gaussian_smoothing5'].corr(dfss['PDO']))
print(dfss['gaussian_smoothing5'].corr(dfss['ONI']))
print(dfss['gaussian_smoothing5'].corr(dfss['DMI']))
-0.41347071336436747 -0.4807726970923461 0.00596374437328528
print(dfss['gaussian_smoothing7'].corr(dfss['PDO']))
print(dfss['gaussian_smoothing7'].corr(dfss['ONI']))
print(dfss['gaussian_smoothing7'].corr(dfss['DMI']))
-0.48405130917963196 -0.46509087553090533 0.03458687042512412
print(dfss['KZ_Filter3'].corr(dfss['PDO']))
print(dfss['KZ_Filter3'].corr(dfss['ONI']))
print(dfss['KZ_Filter3'].corr(dfss['DMI']))
-0.25986527207146837 -0.4690295493106496 -0.05338746597766723 -0.4690295493106496 -0.05338746597766723
print(dfss['KZ_Filter5'].corr(dfss['PDO']))
print(dfss['KZ_Filter5'].corr(dfss['ONI']))
print(dfss['KZ_Filter5'].corr(dfss['DMI']))
-0.30941456212551105 -0.5315598400214587 -0.06125545031912268
print(dfss['KZ_Filter7'].corr(dfss['PDO']))
print(dfss['KZ_Filter7'].corr(dfss['ONI']))
print(dfss['KZ_Filter7'].corr(dfss['DMI']))
-0.3691839805865878 -0.5366000377968919 -0.04103410097909327
print(dfss['7_month_MA'].corr(dfss['PDO']))
print(dfss['7_month_MA'].corr(dfss['ONI']))
print(dfss['7_month_MA'].corr(dfss['DMI']))
-0.36821761927431373 -0.5409307518077192 -0.11228419765427412
print(dfss['3_month_MA'].corr(dfss['PDO']))
print(dfss['3_month_MA'].corr(dfss['ONI']))
print(dfss['3_month_MA'].corr(dfss['DMI']))
-0.21963358569688893 -0.37715552233340716 -0.0687408568705955
import scipy.stats as stats
correlation, p_value = stats.pearsonr(dfss['5_month_MA'], dfss['ONI'])
print(f'Correlation between 5-month moving average and ONI: {correlation:.2f}', f'P-value: {p_value}')
Correlation between 5-month moving average and ONI: -0.50 P-value: 1.9287503609351303e-17
import scipy.stats as stats
correlation, p_value = stats.pearsonr(dfss['7_month_MA'], dfss['ONI'])
print(f'Correlation between 7-month moving average and ONI: {correlation:.2f}', f'P-value: {p_value}')
Correlation between 7-month moving average and ONI: -0.54 P-value: 1.4893193314618848e-20
correlation, p_value = stats.pearsonr(dfss['7_month_MA'], dfss['ONI'])
# Print the results
print(f"Pearson Correlation Coefficient: {correlation}")
print(f"P-Value: {p_value}")
Pearson Correlation Coefficient: -0.5409307518077191 P-Value: 1.4893193314618848e-20
correlation, p_value = stats.pearsonr(dfss['7_month_MA'], dfss['ONI'])
# Print the results
print(f"Pearson Correlation Coefficient: {correlation}")
print(f"P-Value: {p_value}")
Pearson Correlation Coefficient: -0.5409307518077191 P-Value: 1.4893193314618848e-20
#give corelation heat map
import seaborn as sns
dfsss = dfss.drop(['Year', 'index', 'Date'], axis=1)
plt.figure(figsize=(18, 10))
sns.heatmap(dfsss.corr(), annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Heatmap')
plt.show()
#write dss to a csv file
dfss.to_csv('dss.csv', index=False)
seasonal = pd.DataFrame()
seasonal["7_month_MA"] = dfss["7_month_MA"].values
print(onia.head())
SEAS Year Total ANOM Month Date 636 DJF 2003 27.51 0.92 1 2003-01-01 637 JFM 2003 27.41 0.63 2 2003-02-01 638 FMA 2003 27.58 0.38 3 2003-03-01 639 MAM 2003 27.56 -0.04 4 2003-04-01 640 AMJ 2003 27.48 -0.26 5 2003-05-01
print(seasonal.head())
7_month_MA 0 -0.006448 1 0.004238 2 0.004613 3 -0.001631 4 -0.006881
import pandas as pd
import numpy as np
dates = pd.date_range(start='2003-01-01', end='2023-12-31', freq='M')
precip_data = dfss["Value"].values
# Create DataFrame
df_precip = pd.DataFrame({
'Date': dates,
'Precipitation': precip_data
})
df_precip['7_month_MA'] = df_precip['Precipitation'].rolling(window=7, min_periods=1).mean()
# Extract month from the date
df_precip['Month'] = df_precip['Date'].dt.month
# Calculate the long-term monthly averages
monthly_averages = df_precip.groupby('Month')['7_month_MA'].mean()
# Map the long-term averages back onto the DataFrame using the month
df_precip['Monthly_Average'] = df_precip['Month'].apply(lambda x: monthly_averages[x])
# Calculate anomalies
df_precip['Anomaly'] = df_precip['7_month_MA'] - df_precip['Monthly_Average']
plt.figure(figsize=(12, 6))
plt.plot(df_precip['Date'], df_precip['7_month_MA'], label='Anomaly', color='blue')
plt.fill_between(df_precip['Date'], df_precip['7_month_MA'], 0, where=df_precip['7_month_MA'] >= 0, color='red', alpha=0.6)
plt.fill_between(df_precip['Date'], df_precip['7_month_MA'], 0, where=df_precip['7_month_MA'] < 0, color='blue', alpha=0.6)
plt.title('7-Month Moving Average Anomalies in Precipitation (2003-2023)')
plt.xlabel('Year')
plt.ylabel('Anomaly (mm)')
plt.axhline(0, color='red', linestyle='--', linewidth=0.8) # Zero anomaly line
plt.legend()
plt.grid(True)
plt.show()
df_precip['7_month_MA'].corr(dfss['ONI'])
-0.5409307518077192
def get_season(month):
if month in [12, 1, 2]:
return 'DJF' # Winter
elif month in [3, 4, 5]:
return 'MAM' # Spring
elif month in [6, 7, 8]:
return 'JJA' # Summer
elif month in [9, 10, 11]:
return 'SON' # Autumn
# Apply the function to create a Season column
df_precip['Season'] = df_precip['Date'].dt.month.apply(get_season)
# Group by year and season and calculate the mean anomaly
seasonal_anomalies1 = df_precip.groupby([df_precip['Date'].dt.year, 'Season'])['Anomaly'].mean().unstack()
print(seasonal_anomalies1.head())
#swap the column data where mam should be the second and the jja should be the third and son should be the fourth
column_names = ["DJF", "MAM", "JJA", "SON"]
seasonal_anomalies = seasonal_anomalies1[column_names]
seasonal_anomalies["DJF"] = seasonal_anomalies1["DJF"]
seasonal_anomalies["MAM"] = seasonal_anomalies1["MAM"]
seasonal_anomalies["JJA"] = seasonal_anomalies1["JJA"]
seasonal_anomalies["SON"] = seasonal_anomalies1["SON"]
print(seasonal_anomalies.head())
Season DJF JJA MAM SON Date 2003 -0.002738 0.001610 -0.000780 -0.006298 2004 -0.016440 0.000585 -0.009456 -0.002005 2005 -0.006243 -0.006555 -0.011011 0.001420 2006 0.003661 0.001374 0.017203 0.000930 2007 0.000027 0.007538 -0.008563 0.021224 Season DJF MAM JJA SON Date 2003 -0.002738 -0.000780 0.001610 -0.006298 2004 -0.016440 -0.009456 0.000585 -0.002005 2005 -0.006243 -0.011011 -0.006555 0.001420 2006 0.003661 0.017203 0.001374 0.000930 2007 0.000027 -0.008563 0.007538 0.021224
# Plot seasonal anomalies
seasonal_anomalies.plot(kind='bar', figsize=(18, 6), width=0.8)
plt.title('Seasonal Anomalies in 7-Month Moving Average of Precipitation (2003-2023)')
plt.xlabel('Year')
plt.ylabel('Anomaly (mm)')
plt.legend(title='Season')
plt.grid(axis='y', linestyle='--', linewidth=0.7)
plt.show()
kss = dfss.drop(['Year', 'index','Value','5_month_MA','PDO','DMI','3_month_MA','7_month_MA','triangular_smoothing3','triangular_smoothing5','triangular_smoothing7','gaussian_smoothing3','gaussian_smoothing5','gaussian_smoothing7','KZ_Filter3','KZ_Filter5','KZ_Filter7'], axis=1)
print(kss.head())
kss["ONI"] = dfss["ONI"].values
Date ONI 0 2003-01-01 0.92 1 2003-02-01 0.63 2 2003-03-01 0.38 3 2003-04-01 -0.04 4 2003-05-01 -0.26
# Assume df_enso is a DataFrame with ENSO data, where ONI values are monthly and have been processed similarly
# Merge ENSO data
df_full = df_precip.merge(kss, on='Date', how='left')
print(df_full.head())
df_full["ONI"] = dfss["ONI"].values
# Group by season and calculate correlations
correlations = df_full.groupby('Season').apply(lambda x: x['Anomaly'].corr(x['ONI']))
print(correlations)
Date Precipitation 7_month_MA Month Monthly_Average Anomaly \ 0 2003-01-31 -0.006448 -0.006448 1 -0.000579 -0.005868 1 2003-02-28 0.014923 0.004238 2 0.000076 0.004162 2 2003-03-31 0.005363 0.004613 3 -0.000639 0.005252 3 2003-04-30 -0.020362 -0.001631 4 -0.000380 -0.001250 4 2003-05-31 -0.027884 -0.006881 5 -0.000540 -0.006342 Season ONI 0 DJF NaN 1 DJF NaN 2 MAM NaN 3 MAM NaN 4 MAM NaN Season DJF -0.556773 JJA -0.428243 MAM -0.474263 SON -0.618290 dtype: float64
print(df_full["7_month_MA"].max())
print(df_full["7_month_MA"].min())
print(dfss["ONI"].max(),dfss["ONI"].min())
print()
0.03918827792594443 -0.029785487185765486 2.64 -1.64
# Assuming 'seasonal_anomalies' is your DataFrame with anomalies by season
autumn_anomalies = seasonal_anomalies['SON'] # Assuming 'SON' is the column for Autumn anomalies
plt.figure(figsize=(15,8)) # Set the figure size as desired
autumn_anomalies.plot(kind='bar', color='orange') # Use a color that stands out for Autumn
plt.title('Autumn (SON) Seasonal Anomalies in 7-Month Moving Average of Precipitation (2003-2023)')
plt.xlabel('Year')
plt.ylabel('Anomaly (mm)')
plt.axhline(0, color='black', linestyle='--', linewidth=1) # Zero anomaly line for reference
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
# Filter the data for Autumn (SON) season
Autumn_ENO = onia[onia['SEAS'] == 'SON']
# Get unique years
unique_years = Autumn_ENO['Year'].unique()
# Plot the data
plt.figure(figsize=(15, 8))
plt.bar(Autumn_ENO['Year'], Autumn_ENO['ANOM'], color='blue')
plt.title('Oceanic Niño Index in Autumn (SON) Seasons')
plt.xlabel('Year')
plt.ylabel('ONI Value')
plt.grid(True)
# Set x-ticks to unique years
plt.xticks(unique_years,rotation =90)
plt.show()
fig, axs = plt.subplots(2, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].bar(Autumn_ENO['Year'], Autumn_ENO['ANOM'], color='blue')
axs[0].set_title('Oceanic Niño Index in Autumn (SON) Seasons')
axs[0].set_xlabel('Year')
axs[0].set_ylabel('ONI Value')
axs[0].grid(True)
axs[0].set_xticks(unique_years)
axs[0].tick_params(axis='x', rotation=90)
# Plot the second graph in the second subplot
autumn_anomalies.plot(kind='bar', ax=axs[1], color='orange') # Use a color that stands out for Autumn
axs[1].set_title('Autumn (SON) Seasonal Anomalies in 7-Month Moving Average of Precipitation (2003-2023)')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('Anomaly (mm)')
axs[1].axhline(0, color='black', linestyle='--', linewidth=1) # Zero anomaly line for reference
axs[1].grid(axis='y', linestyle='--', alpha=0.7)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
# Creating a DataFrame
data = pd.DataFrame({
'Date': df_full['Date'],
'ENSO': df_full["ONI"],
'Precipitation_MA7': df_full['7_month_MA']
})
data.set_index('Date', inplace=True)
# 2. Plotting the generated data
plt.figure(figsize=(14, 6))
plt.plot(data.index, data['Precipitation_MA7'], label='7-Month MA Precipitation (mm/hr)')
plt.plot(data.index, data['ENSO'], label='ENSO Index', alpha=0.7)
plt.title('Simulated Precipitation and ENSO Index (2003-2023)')
plt.xlabel('Year')
plt.ylabel('Value')
plt.legend()
plt.grid(True)
plt.show()
# 3. Differencing the data to make it stationary
data['Precipitation_MA7_diff'] = data['Precipitation_MA7'].diff().dropna()
data['ENSO_diff'] = data['ENSO'].diff().dropna()
# Dropping NaN values resulting from differencing
data_diff = data.dropna()
# 4. Re-fitting the SARIMAX model on differenced data
model_diff = SARIMAX(data_diff['Precipitation_MA7_diff'], exog=data_diff[['ENSO_diff']],
order=(1,0,1), seasonal_order=(1,0,1,12))
model_fit_diff = model_diff.fit(disp=False)
# 5. Analyzing the model
# Summary of the model
print(model_fit_diff.summary())
# Generating the residuals plot and ACF/PACF plots for the differenced data
residuals_diff = model_fit_diff.resid
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
plot_acf(residuals_diff, lags=30, ax=ax1)
plot_pacf(residuals_diff, lags=30, ax=ax2)
plt.show()
# Displaying the first few rows of the differenced data
print(data_diff.head())
c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency M will be used. self._init_dates(dates, freq) c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency M will be used. self._init_dates(dates, freq)
SARIMAX Results
==========================================================================================
Dep. Variable: Precipitation_MA7_diff No. Observations: 251
Model: SARIMAX(1, 0, 1)x(1, 0, 1, 12) Log Likelihood 911.141
Date: Wed, 14 Aug 2024 AIC -1810.282
Time: 15:39:25 BIC -1789.129
Sample: 02-28-2003 HQIC -1801.770
- 12-31-2023
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ENSO_diff -0.0058 0.002 -2.607 0.009 -0.010 -0.001
ar.L1 0.5640 0.560 1.007 0.314 -0.534 1.662
ma.L1 -0.6280 0.541 -1.160 0.246 -1.689 0.433
ar.S.L12 0.1340 0.370 0.363 0.717 -0.590 0.858
ma.S.L12 -0.3000 0.357 -0.841 0.400 -0.999 0.399
sigma2 4.104e-05 3.63e-06 11.296 0.000 3.39e-05 4.82e-05
===================================================================================
Ljung-Box (L1) (Q): 1.31 Jarque-Bera (JB): 1.10
Prob(Q): 0.25 Prob(JB): 0.58
Heteroskedasticity (H): 1.68 Skew: 0.08
Prob(H) (two-sided): 0.02 Kurtosis: 3.28
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
ENSO Precipitation_MA7 Precipitation_MA7_diff ENSO_diff Date 2003-02-28 0.63 0.004238 0.010685 -0.29 2003-03-31 0.38 0.004613 0.000375 -0.25 2003-04-30 -0.04 -0.001631 -0.006244 -0.42 2003-05-31 -0.26 -0.006881 -0.005251 -0.22 2003-06-30 -0.16 -0.003071 0.003810 0.10
import scipy.stats as stats
# Assuming data_diff is the DataFrame containing your differenced data
enso_diff = data_diff['ENSO_diff']
precip_diff = data_diff['Precipitation_MA7_diff']
# Calculate Pearson correlation coefficient and p-value
correlation, p_value = stats.pearsonr(enso_diff, precip_diff)
# Print the results
print(f"Pearson Correlation Coefficient: {correlation}")
print(f"P-Value: {p_value}")
Pearson Correlation Coefficient: -0.1601622206346122 P-Value: 0.011047053243263213
from statsmodels.tsa.stattools import adfuller
adf_result = adfuller(dfss['7_month_MA'])
print('ADF Statistic: %f' % adf_result[0])
print('p-value: %f' % adf_result[1])
ADF Statistic: -3.126609 p-value: 0.024646
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
import matplotlib.pyplot as plt
# Load your dataset
# Assuming your DataFrame is named df and has a datetime index
# df = pd.read_csv('your_data.csv', index_col='date', parse_dates=True)
# Filter the data to include only the training period
train = df_full[df_full["Date"]<'2021-01']
# Define the model
model = SARIMAX(train['7_month_MA'],
exog=train['ONI'],
order=(1, 0, 1),
seasonal_order=(1, 0, 1, 12),
enforce_stationarity=False,
enforce_invertibility=False)
# Fit the model
results = model.fit()
# Predict the values of precipitation for the test period 2018-2023
# Filter the data to include only the test period
test = df_full[df_full["Date"] >= '2021-01']
# Predict the values for the test period
predictions = results.get_forecast(steps=len(test), exog=test['ONI'])
predicted_mean = predictions.predicted_mean
plt.figure(figsize=(14, 6))
plt.plot(df_full['Date'], df_full['7_month_MA'], label='7-Month MA Precipitation (mm/hr)')
plt.plot(train['Date'], train['7_month_MA'], label='Training Data', color='blue')
plt.plot(test['Date'], predicted_mean, label='Predictions', color='red')
plt.title('7-Month MA Precipitation Predictions (2018-2023)')
plt.xlabel('Year')
plt.ylabel('Value')
plt.legend()
plt.grid(True)
plt.show()
print(test['7_month_MA'])
216 0.027168 217 0.022613 218 0.008827 219 0.001762 220 0.026387 221 0.022341 222 0.011548 223 0.001744 224 0.010311 225 0.017702 226 0.028898 227 0.010548 228 0.016254 229 0.017414 230 0.023396 231 0.014846 232 0.012935 233 -0.002619 234 0.002975 235 0.009400 236 0.007919 237 0.014473 238 0.010490 239 0.008363 240 0.015061 241 0.011405 242 0.004982 243 0.001858 244 -0.008850 245 -0.005538 246 -0.003048 247 -0.018115 248 -0.007435 249 -0.010876 250 -0.002979 251 0.004983 Name: 7_month_MA, dtype: float64
plt.figure(figsize=(14, 6))
plt.plot(test['Date'], test['7_month_MA'], label='Actual', color='blue')
plt.plot(test['Date'], predicted_mean , label='Predicted', color='red')
[<matplotlib.lines.Line2D at 0x1fa51aeb790>]
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
# Generate synthetic monthly ENSO and precipitation data
# Create DataFrame
data = pd.DataFrame({
'Date': df_full['Date'],
'ENSO': df_full['ONI'],
'Precipitation': df_full['7_month_MA']
}).set_index('Date')
# Compute the cross-correlation
lags = np.arange(-7, 7)
correlations = []
for lag in lags:
shifted_enso = data['ENSO'].shift(lag).dropna()
aligned_precipitation = data['Precipitation'].shift(-lag).dropna()
common_index = shifted_enso.index.intersection(aligned_precipitation.index)
correlation = pearsonr(shifted_enso.loc[common_index], aligned_precipitation.loc[common_index])[0]
correlations.append(correlation)
# Plot the cross-correlation
plt.figure(figsize=(10, 5))
plt.plot(lags, correlations, marker='o', linestyle='-', color='b')
plt.title('Cross-Correlation between ENSO and Precipitation')
plt.xlabel('Lag (months)')
plt.ylabel('Correlation Coefficient')
plt.axhline(0, color='black', linewidth=0.5, linestyle='--')
plt.axvline(0, color='black', linewidth=0.5, linestyle='--')
plt.grid(True)
plt.show()
data = pd.DataFrame({
'Date': df_full['Date'],
'ENSO': df_full['ONI'],
'Precipitation': df_full['7_month_MA']
}).set_index('Date')
lag = 0
shifted_enso = data['ENSO'].shift(lag).dropna()
aligned_precipitation = data['Precipitation'].shift(-lag).dropna()
common_index = shifted_enso.index.intersection(aligned_precipitation.index)
correlation = pearsonr(shifted_enso.loc[common_index], aligned_precipitation.loc[common_index])[0]
correlations.append(correlation)
print(correlation)
-0.5409307518077191